Packages set up

load data sets

#data(field)
#dim(field)
# head(field)
data(veggyraw)
data(veggy)

This dataset is a set of pixel count measurements to characterize green and red area per pot.

Example image of germinating plants and red flags of those unsuccessful pots

Example image of germinating plants and red flags of those unsuccessful pots

Verify replicability

## check duplicated image pots
veggyraw=mutate(veggyraw, indexrep=paste(site,qp,pos,day,sep='_'))

## Number of pots used for replicability analysis
dim(veggyraw[duplicated(veggyraw$indexrep),]) # There are 790 in total from both experiments
## [1] 790  13
unique(veggyraw[duplicated(veggyraw$indexrep),]$day)
##  [1] "2015-11-16" "2015-11-09" "2015-12-04" "2015-11-13" "2015-12-09"
##  [6] "2016-02-07" "2016-02-09" "2015-11-23" "2016-01-30" "2015-10-30"
## [11] "2015-11-27"
unique(veggyraw[duplicated(veggyraw$indexrep),]$qp)
##  [1] 105 109 141 164 168 181 190 193 201 231 232 233 249 257 260  31 330
## [18]  33  45  50  67
## Run the test for replicability
library(MCMCglmm)
## Loading required package: Matrix
## Loading required package: coda
## Loading required package: ape
lmm=MCMCglmm(data=veggyraw[duplicated(veggyraw$indexrep),], countgreen ~1, random = ~ indexrep, family = 'poisson',verbose=F)
print ( h2MCMC(lmm,randomname = 'indexrep') )
## $Mode
##      var1 
## 0.9974931 
## 
## $HPD
##          lower     upper
## var1 0.9957086 0.9984387
## attr(,"Probability")
## [1] 0.95

Clean data by removing duplicate records from duplicated images

## Produce cleaner data than veggyraw
## Since there are duplicates but the replicability is >99%, generate average of
## counts for two pots of the same identity (can be due to two pictures per
## tray/day). Necessary for merge with master dataset later

# veggy %>% mutate( indexrep=paste(site,qp,pos,day,sep='_')) %>%
#           group_by(indexrep) %>% summarise(site=unique(site),
#                                                qp=unique(qp),
#                                                pos=unique(pos),
#                                                trayid=unique(trayid),
#                                                rep=unique(rep),
#                                                indpop=unique(indpop),
#                                                water=unique(water),
#                                                id=unique(id),
#                                                day=unique(day),
#                                                countgreen=mean(countgreen),
#                                                countred=mean(countred)
#                                            ) ->veggy
# veggy %>% head()
# veggy %>% tail()
# dim(veggy)
# 
# veggy= veggy %>% mutate( potindex=paste(site,qp,pos,id,sep="_")) %>% select(-indexrep)
# 
# devtools::use_data(veggy,overwrite = T) # Save the data for the package

data(veggy)
dim(veggy)

This is not run because takes some time, instead I load the data already produced

Detecting unsuccessful pots

# just get the total number of red points
red=veggy %>% group_by(site,qp,pos,trayid,rep,indpop,water,id) %>% summarize(redsum=sum(countred)) %>% mutate(identifier=paste(sep="_",site,qp,pos)) 
# calculate variance across groups by establishing different threshold values
fvals<-sapply(seq(1e4,1e6,by=1000),function(x){
    summary(aov(red$redsum ~ red$redsum >x))[[1]][["F value"]][1]
})

plot(fvals ~ seq(1e4,1e6,by=1000))

foundthreshold= seq(1e4,1e6,by=1000)[which(fvals==max(fvals,na.rm=T))] # =152000
print(foundthreshold)
## [1] 152000
# generate a bad flag column
table(red$redsum > foundthreshold)
## 
## FALSE  TRUE 
## 22780  1967
badflags = red %>% filter(redsum > foundthreshold) %>% select(identifier)
## Adding missing grouping variables: `site`, `qp`, `pos`, `trayid`, `rep`, `indpop`, `water`
badflags=unique(badflags$identifier)
length(badflags)
## [1] 1967

By calculating F statistic, we calculate the variance between two groups of pots whose pixels are counted. For that several thresholds are tried, the one that separates better the two distribution is the one that will be used

Modeling trajectories of germination and growth

Just to have the number of green counts:

# get total number of green counts
green=veggy %>% group_by(site,qp,pos,trayid,rep,indpop,water,id) %>%
  summarize(greensum=sum(countgreen)) %>%
  mutate(identifier=paste(sep="_",site,qp,pos))

badflagsgreen=green %>% filter(greensum < 1e4) %>% select(identifier) # just in case it is needed later
## Adding missing grouping variables: `site`, `qp`, `pos`, `trayid`, `rep`, `indpop`, `water`
green=green%>%select(-identifier)

Model all trajectories as sigmoidal functions:

veg<-
  veggy %>%
  # head(1e4) %>% # for profiling

# a couple of filters

  mutate(identifier=paste(sep="_",site,qp,pos)) %>% # get an indentifier variable
  filter( ! identifier  %in% badflags) %>% # remove those pots with bad identifiers
  # filter( ! identifier  %in% badflagsgreen) %>% # filter if there is not enough green

  
# select early positions

  mutate(starting=startday(site)) %>% # add the start day of the experiment ina per row basis for later calculations
  mutate(daycount= fn(day - as.Date(starting) ) )%>% #  trick to filter the 40 first dates depending on experiment
  filter(daycount <60) %>% # 40 days because we started the thinning like 1 month after they started germination, probably would be better to do it in a per pot basis.

  group_by(site,qp,pos,trayid,rep,indpop,water,id) %>% # group observations by pot to analyse each time series

# calculate several trajectory informations
  
  summarize(
            ger.a=fitsigmoid(countgreen,daycount,parameter='a'),
            ger.b=fitsigmoid(countgreen,daycount,parameter='b'),
            ger.c=fitsigmoid(countgreen,daycount,parameter='c'),
            firstgreen=firstgreen(y=countgreen,x=daycount),  # first green pixels is kind of arbitrary
            lin.0=fitlinear(countgreen,daycount,'significance')  # Probably the regression one is not so nice.

            )



## Get the
# merge with total counts of red and green
veg=veg %>%
  full_join(.,red,by=c('site','qp','pos','trayid','rep','indpop','water','id')) %>%
  full_join(.,green,by=c('site','qp','pos','trayid','rep','indpop','water','id'))

devtools::use_data(veg,overwrite = T)

data(veg)

Plotting the sigmoidal curves of 1000 pots

vegh<-head(veg,1000) %>% filter(ger.a != "NA")

plot( ylab='Predicted # pixels',xlab='Days of experiment',
  ylim=c(0,50000),
  y=
getsigmoid(
  a= substrRight(vegh$ger.a,lastpos = 4,giveright = F)[2],
  b= substrRight(vegh$ger.b,lastpos = 4,giveright = F)[2],
  c= substrRight(vegh$ger.c,lastpos = 4,giveright = F)[2]
),
x=1:40,
type='l'
)
for( i in 1:nrow(vegh)){
lines(  y=
getsigmoid(
  a= substrRight(vegh$ger.a,lastpos = 4,giveright = F)[i],
  b= substrRight(vegh$ger.b,lastpos = 4,giveright = F)[i],
  c= substrRight(vegh$ger.c,lastpos = 4,giveright = F)[i]
),
x=1:40,
col=transparent('black')
)
}

<<<<<<< HEAD

## R version 3.3.2 (2016-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.5 LTS
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=de_DE.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] MCMCglmm_2.23      ape_3.5            coda_0.18-1       
##  [4] Matrix_1.2-7.1     field_0.0.0.9000   moiR_0.0.1        
##  [7] RColorBrewer_1.1-2 devtools_1.12.0    cowplot_0.7.0     
## [10] ggplot2_2.2.1      dplyr_0.5.0        knitr_1.16        
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.10        plyr_1.8.4          tools_3.3.2        
##  [4] digest_0.6.12       evaluate_0.10       memoise_1.1.0      
##  [7] tibble_1.2          gtable_0.2.0        nlme_3.1-129       
## [10] lattice_0.20-34     DBI_0.6-1           commonmark_0.9     
## [13] yaml_2.1.14         withr_1.0.2         stringr_1.2.0      
## [16] roxygen2_5.0.1.9000 xml2_1.0.0          rprojroot_1.2      
## [19] grid_3.3.2          R6_2.2.0            rmarkdown_1.5.9000 
## [22] tensorA_0.36        corpcor_1.6.8       tidyr_0.6.1        
## [25] magrittr_1.5        backports_1.0.5     scales_0.4.1       
## [28] htmltools_0.3.6     assertthat_0.1      cubature_1.1-2     
## [31] colorspace_1.2-7    labeling_0.3        stringi_1.1.3      
## [34] lazyeval_0.2.0      munsell_0.4.3

=======

Calculate various variance proportions of the modeled trajectories.

library(lme4)
library(MCMCglmm)

veg %>%
  filter(redsum < 1e4) %>%
  # filter(greensum >1e4) %>%
  mutate(
  a= removetail(ger.a,4),
  b= removetail(ger.b,4),
  c= removetail(ger.c,4)
  ) %>%
lmer(formula = a ~ (1|id) + (1|water)+ (1|site) + (1|indpop)  ) -> lmod 
lmod %>% randomvariance(.,var =c('id','water','site',"indpop") )
##             [,1]
## [1,] 0.003424558
## [2,] 0.002360285
## [3,] 0.000000000
## [4,] 0.048074078
veg %>%
  filter(redsum < 1e4) %>%
  # filter(greensum >1e4) %>%
  mutate(
  a= removetail(ger.a,4),
  b= removetail(ger.b,4),
  c= removetail(ger.c,4)
  ) %>%
lmer(formula = b ~ (1|id) + (1|water)+ (1|site) + (1|indpop)  ) -> lmod
lmod %>% randomvariance(.,var =c('id','water','site',"indpop") )
##              [,1]
## [1,] 0.0008525735
## [2,] 0.0017037188
## [3,] 0.0398655522
## [4,] 0.3056023807
veg %>%
  filter(redsum < 1e4) %>%
  # filter(greensum >1e4) %>%
  mutate(
  a= removetail(ger.a,4),
  b= removetail(ger.b,4),
  c= removetail(ger.c,4)
  ) %>%
lmer(formula = c ~ (1|id) + (1|water)+ (1|site) + (1|indpop)  ) -> lmod 
lmod %>% randomvariance(.,var =c('id','water','site',"indpop") )
##             [,1]
## [1,] 0.001048828
## [2,] 0.003458954
## [3,] 0.074997945
## [4,] 0.336709837

Not very satisfactory. Most of the effect is about individual versus population replicates

Calculate variance in a per-experiment treatment basis

veg %>%
  filter(redsum < 1e4) %>%
  # filter(greensum >1e4) %>%
  mutate(
  a= removetail(ger.a,4),
  b= removetail(ger.b,4),
  c= removetail(ger.c,4),
  l=removetail(lin.0)
  ) %>%
  filter(water=='h', indpop=='i', site=='madrid') %>%
# lmer(formula = b ~ (1|id) ) -> lmod 
# lmer(formula = a ~ (1|id) ) -> lmod 
# lmer(formula = c ~ (1|id) ) -> lmod 
# lmer(formula = greensum ~ (1|id) ) -> lmod
# lmer(formula = redsum ~ (1|id) ) -> lmod
lmer(formula = l ~ (1|id) ) -> lmod
## Warning in fn(substrRight(x, lastpos = 8, giveright = F)): NAs introduced
## by coercion
lmod %>% randomvariance(.,var =c('id') )
##              [,1]
## [1,] 2.533265e-14